package org.p7h.storm.wordcount.topology;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.utils.Utils;
import org.p7h.storm.wordcount.bolts.WordCountBolt;
import org.p7h.storm.wordcount.bolts.WordSplitBolt;
import org.p7h.storm.wordcount.spouts.TwitterSpout;
import org.p7h.storm.wordcount.utils.Constants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Orchestrates the elements and forms a Topology to count the words present in Tweets.
*
* @author - Prashanth Babu
*/
public final class WordCountTopology {
private static final Logger LOGGER = LoggerFactory.getLogger(WordCountTopology.class);
public static final void main(final String[] args) {
try {
final Config config = new Config();
config.setMessageTimeoutSecs(120);
config.setDebug(false);
final TopologyBuilder topologyBuilder = new TopologyBuilder();
topologyBuilder.setSpout("twitterspout", new TwitterSpout());
//Create WordSplitBolt with minimum word length to be considered.
//This is more to reduce the number of words to be processed i.e. for ignoring simple and most used words.
topologyBuilder.setBolt("wordsplitbolt", new WordSplitBolt(4))
.shuffleGrouping("twitterspout");
//Create Bolt with the frequency of logging [in seconds] and count threshold of words.
topologyBuilder.setBolt("wordcountbolt", new WordCountBolt(30, 9))
.shuffleGrouping("wordsplitbolt");
//Submit it to the cluster, or submit it locally
if (null != args && 0 < args.length) {
config.setNumWorkers(3);
StormSubmitter.submitTopology(args[0], config, topologyBuilder.createTopology());
} else {
config.setMaxTaskParallelism(10);
final LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology(Constants.TOPOLOGY_NAME, config, topologyBuilder.createTopology());
//Run this topology for 120 seconds so that we can complete processing of decent # of tweets.
Utils.sleep(120 * 1000);
LOGGER.info("Shutting down the cluster...");
localCluster.killTopology(Constants.TOPOLOGY_NAME);
localCluster.shutdown();
}
} catch (final AlreadyAliveException | InvalidTopologyException exception) {
//Deliberate no op; not required actually.
//exception.printStackTrace();
} catch (final Exception exception) {
//Deliberate no op; not required actually.
//exception.printStackTrace();
}
LOGGER.info("\n\n\n\t\t*****Please clean your temp folder \"{}\" now!!!*****", System.getProperty("java.io.tmpdir"));
}
}